package formatfa.streaming

import org.apache.spark.sql.SparkSession

class DataProcess {

}
//处理评分 数据格式，按照时间戳排序
object DataProcess
{
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("process").master("local[2]").getOrCreate()
    val ratings = spark.read.schema("userId string,movieId string,rating float,timestamp long").option("header",true).csv("G:\\projects\\streaming_ratings\\dataSimulator\\in\\ratings.csv")
    println(ratings)
    println(ratings.printSchema())
//    一个分区，生成一个csv文件即可
    ratings.sort("timestamp").repartition(1).write.option("header",true).csv("G:\\projects\\streaming_ratings\\dataSimulator\\in\\ratings_sort_timestamp.csv")


  }
}
